Data Visualization Project 02

library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.3
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(maps)
## Warning: package 'maps' was built under R version 4.0.3
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
cb_palette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", "#000000")
marathon_2017 <- read.csv('../data/marathon_results_2017.csv')
ggplotly(marathon_2017 %>% 
  filter(Gender < 101 & M.F == 'M') %>% 
  ggplot(aes(x = Gender, y = Age), alpha = 1) +
  geom_point(aes(color = Country), shape = 21) +
  scale_color_manual(values = cb_palette) +
  theme_classic()) %>% 
  layout(legend = list(orientation = 'h', y = -0.3)) %>% 
  layout(title='Age and Position on Male Marathon 2017', xaxis = list(title ='Position'))
ggplotly(marathon_2017 %>% 
  filter(Gender < 101 & M.F == 'F') %>% 
  ggplot(aes(x = Gender, y = Age), alpha = 1) +
  geom_point(aes(color = Country), shape = 21) +
  scale_color_manual(values = cb_palette) +
  theme_classic()) %>% 
  layout(legend = list(orientation = 'h', y = -0.3)) %>% 
  layout(title='Age and Position on Female Marathon 2017', xaxis = list(title ='Position'))
marathon_2017 %>% 
  filter(Gender < 101) %>%
  spread(M.F, Gender) %>% 
  plot_ly() %>%
  add_trace(
    type = 'scatter',
    mode = 'lines+markers',
    x = ~M,
    y = ~Pace,
    text = ~Name,
    name = "Male"
  ) %>%
  add_trace(
    type = 'scatter',
    mode = 'lines+markers',
    x = ~F,
    y = ~Pace,
    text = ~Name,
    name = "Female"
  ) %>% 
  layout(title='Pace comparison between male and female first 100 positions on marathon 2017', xaxis = list(title ='Position'))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
marathon_2017 %>% 
  select(Overall, Gender, Age) %>% 
  ggpairs()

countries <- read.csv('../data/countries.csv')
WorldData <- map_data('world') %>% filter(region != "Antarctica") %>% fortify

spread_countries <- countries %>% 
  spread(alpha3, name)
spread_countries <- spread_countries[,3:ncol(spread_countries)]
spread_countries <- gather(spread_countries) %>% 
  group_by(key) %>% 
  subset(!is.na(value)) %>% 
  mutate(key = toupper(key)) %>% 
  spread(key, value)
spread_countries
## # A tibble: 1 x 193
##   AFG    AGO   ALB   AND   ARE   ARG   ARM   ATG   AUS   AUT   AZE   BDI   BEL  
##   <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Afgha~ Ango~ Alba~ Ando~ Unit~ Arge~ Arme~ Anti~ Aust~ Aust~ Azer~ Buru~ Belg~
## # ... with 180 more variables: BEN <chr>, BFA <chr>, BGD <chr>, BGR <chr>,
## #   BHR <chr>, BHS <chr>, BIH <chr>, BLR <chr>, BLZ <chr>, BOL <chr>,
## #   BRA <chr>, BRB <chr>, BRN <chr>, BTN <chr>, BWA <chr>, CAF <chr>,
## #   CAN <chr>, CHE <chr>, CHL <chr>, CHN <chr>, CIV <chr>, CMR <chr>,
## #   COD <chr>, COG <chr>, COL <chr>, COM <chr>, CPV <chr>, CRI <chr>,
## #   CUB <chr>, CYP <chr>, CZE <chr>, DEU <chr>, DJI <chr>, DMA <chr>,
## #   DNK <chr>, DOM <chr>, DZA <chr>, ECU <chr>, EGY <chr>, ERI <chr>,
## #   ESP <chr>, EST <chr>, ETH <chr>, FIN <chr>, FJI <chr>, FRA <chr>,
## #   FSM <chr>, GAB <chr>, GBR <chr>, GEO <chr>, GHA <chr>, GIN <chr>,
## #   GMB <chr>, GNB <chr>, GNQ <chr>, GRC <chr>, GRD <chr>, GTM <chr>,
## #   GUY <chr>, HND <chr>, HRV <chr>, HTI <chr>, HUN <chr>, IDN <chr>,
## #   IND <chr>, IRL <chr>, IRN <chr>, IRQ <chr>, ISL <chr>, ISR <chr>,
## #   ITA <chr>, JAM <chr>, JOR <chr>, JPN <chr>, KAZ <chr>, KEN <chr>,
## #   KGZ <chr>, KHM <chr>, KIR <chr>, KNA <chr>, KOR <chr>, KWT <chr>,
## #   LAO <chr>, LBN <chr>, LBR <chr>, LBY <chr>, LCA <chr>, LIE <chr>,
## #   LKA <chr>, LSO <chr>, LTU <chr>, LUX <chr>, LVA <chr>, MAR <chr>,
## #   MCO <chr>, MDA <chr>, MDG <chr>, MDV <chr>, MEX <chr>, MHL <chr>, ...
ggplot() +
    geom_map(data = WorldData, map = WorldData,
                  aes(x = long, y = lat, group = group, map_id=region),
                  fill = "white", colour = "#7f7f7f", size=0.5)  + 
    geom_map(data = marathon_2017, map=WorldData,
                  aes(fill=Gender, map_id=City),
                  colour="#7f7f7f", size=0.5) +
    coord_map("rectangular", lat0=0, xlim=c(-180,180), ylim=c(-60, 90)) + 
    theme_bw() +
    ggtitle("Athletes by country") +
    labs(fill = "Number of athletes", x = '', y = '')
## Warning: Ignoring unknown aesthetics: x, y